{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.1.分类数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p>表示分类变量最常用的数据方法是使用<strong>one-hot编码或N取一编码</strong>,也叫虚拟变量.其背后的思想是将一个分类变量替换为一个或多个特征变量,新特征取值为1或0</p>\n",
    "\n",
    "### 使用pandas将数据转化为分类变量one-hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>education</th>\n",
       "      <th>gender</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>occupation</th>\n",
       "      <th>income</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>State-gov</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Adm-clerical</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>Self-emp-not-inc</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Male</td>\n",
       "      <td>13</td>\n",
       "      <td>Exec-managerial</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>Private</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53</td>\n",
       "      <td>Private</td>\n",
       "      <td>11th</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>Handlers-cleaners</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>Private</td>\n",
       "      <td>Bachelors</td>\n",
       "      <td>Female</td>\n",
       "      <td>40</td>\n",
       "      <td>Prof-specialty</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age          workclass   education   gender  hours-per-week  \\\n",
       "0   39          State-gov   Bachelors     Male              40   \n",
       "1   50   Self-emp-not-inc   Bachelors     Male              13   \n",
       "2   38            Private     HS-grad     Male              40   \n",
       "3   53            Private        11th     Male              40   \n",
       "4   28            Private   Bachelors   Female              40   \n",
       "\n",
       "           occupation  income  \n",
       "0        Adm-clerical   <=50K  \n",
       "1     Exec-managerial   <=50K  \n",
       "2   Handlers-cleaners   <=50K  \n",
       "3   Handlers-cleaners   <=50K  \n",
       "4      Prof-specialty   <=50K  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import display\n",
    "data = pd.read_csv(\"F:/python学习资料/adult.csv\",header=None,index_col=False,\n",
    "                  names=['age','workclass','fnlwgt','education','education-num','marital-status','occupation','relationship',\n",
    "                       'race','gender','capital-gain','capital-loss','hours-per-week','native-country','income'])\n",
    "data = data[['age','workclass','education','gender','hours-per-week','occupation','income']]\n",
    "display(data.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Male      21790\n",
      " Female    10771\n",
      "Name: gender, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 检查字符串编码的分类数据\n",
    "print(data.gender.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "初始特征:\n",
      "['age', 'workclass', 'education', 'gender', 'hours-per-week', 'occupation', 'income']\n",
      "ont-hot编码后:\n",
      "['age', 'hours-per-week', 'workclass_ ?', 'workclass_ Federal-gov', 'workclass_ Local-gov', 'workclass_ Never-worked', 'workclass_ Private', 'workclass_ Self-emp-inc', 'workclass_ Self-emp-not-inc', 'workclass_ State-gov', 'workclass_ Without-pay', 'education_ 10th', 'education_ 11th', 'education_ 12th', 'education_ 1st-4th', 'education_ 5th-6th', 'education_ 7th-8th', 'education_ 9th', 'education_ Assoc-acdm', 'education_ Assoc-voc', 'education_ Bachelors', 'education_ Doctorate', 'education_ HS-grad', 'education_ Masters', 'education_ Preschool', 'education_ Prof-school', 'education_ Some-college', 'gender_ Female', 'gender_ Male', 'occupation_ ?', 'occupation_ Adm-clerical', 'occupation_ Armed-Forces', 'occupation_ Craft-repair', 'occupation_ Exec-managerial', 'occupation_ Farming-fishing', 'occupation_ Handlers-cleaners', 'occupation_ Machine-op-inspct', 'occupation_ Other-service', 'occupation_ Priv-house-serv', 'occupation_ Prof-specialty', 'occupation_ Protective-serv', 'occupation_ Sales', 'occupation_ Tech-support', 'occupation_ Transport-moving', 'income_ <=50K', 'income_ >50K']\n"
     ]
    }
   ],
   "source": [
    "# 使用get_dummies函数做编码\n",
    "print(f\"初始特征:\\n{list(data.columns)}\")\n",
    "data_dummies=pd.get_dummies(data)\n",
    "print(f\"ont-hot编码后:\\n{list(data_dummies.columns)}\")\n",
    "# 可见连续特征age和hours-per-week没有发生变化,而每个分类特征的可能取值都被扩展为一个新特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>workclass_ ?</th>\n",
       "      <th>workclass_ Federal-gov</th>\n",
       "      <th>workclass_ Local-gov</th>\n",
       "      <th>workclass_ Never-worked</th>\n",
       "      <th>workclass_ Private</th>\n",
       "      <th>workclass_ Self-emp-inc</th>\n",
       "      <th>workclass_ Self-emp-not-inc</th>\n",
       "      <th>workclass_ State-gov</th>\n",
       "      <th>...</th>\n",
       "      <th>occupation_ Machine-op-inspct</th>\n",
       "      <th>occupation_ Other-service</th>\n",
       "      <th>occupation_ Priv-house-serv</th>\n",
       "      <th>occupation_ Prof-specialty</th>\n",
       "      <th>occupation_ Protective-serv</th>\n",
       "      <th>occupation_ Sales</th>\n",
       "      <th>occupation_ Tech-support</th>\n",
       "      <th>occupation_ Transport-moving</th>\n",
       "      <th>income_ &lt;=50K</th>\n",
       "      <th>income_ &gt;50K</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>39</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>50</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>53</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>28</td>\n",
       "      <td>40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 46 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  hours-per-week  workclass_ ?  workclass_ Federal-gov  \\\n",
       "0   39              40             0                       0   \n",
       "1   50              13             0                       0   \n",
       "2   38              40             0                       0   \n",
       "3   53              40             0                       0   \n",
       "4   28              40             0                       0   \n",
       "\n",
       "   workclass_ Local-gov  workclass_ Never-worked  workclass_ Private  \\\n",
       "0                     0                        0                   0   \n",
       "1                     0                        0                   0   \n",
       "2                     0                        0                   1   \n",
       "3                     0                        0                   1   \n",
       "4                     0                        0                   1   \n",
       "\n",
       "   workclass_ Self-emp-inc  workclass_ Self-emp-not-inc  workclass_ State-gov  \\\n",
       "0                        0                            0                     1   \n",
       "1                        0                            1                     0   \n",
       "2                        0                            0                     0   \n",
       "3                        0                            0                     0   \n",
       "4                        0                            0                     0   \n",
       "\n",
       "   ...  occupation_ Machine-op-inspct  occupation_ Other-service  \\\n",
       "0  ...                              0                          0   \n",
       "1  ...                              0                          0   \n",
       "2  ...                              0                          0   \n",
       "3  ...                              0                          0   \n",
       "4  ...                              0                          0   \n",
       "\n",
       "   occupation_ Priv-house-serv  occupation_ Prof-specialty  \\\n",
       "0                            0                           0   \n",
       "1                            0                           0   \n",
       "2                            0                           0   \n",
       "3                            0                           0   \n",
       "4                            0                           1   \n",
       "\n",
       "   occupation_ Protective-serv  occupation_ Sales  occupation_ Tech-support  \\\n",
       "0                            0                  0                         0   \n",
       "1                            0                  0                         0   \n",
       "2                            0                  0                         0   \n",
       "3                            0                  0                         0   \n",
       "4                            0                  0                         0   \n",
       "\n",
       "   occupation_ Transport-moving  income_ <=50K  income_ >50K  \n",
       "0                             0              1             0  \n",
       "1                             0              1             0  \n",
       "2                             0              1             0  \n",
       "3                             0              1             0  \n",
       "4                             0              1             0  \n",
       "\n",
       "[5 rows x 46 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dummies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X.shape:(32561, 44) y.shape:(32561,)\n"
     ]
    }
   ],
   "source": [
    "features = data_dummies.loc[:,'age':'occupation_ Transport-moving']\n",
    "# 提取numpy数组\n",
    "X=features.values\n",
    "y=data_dummies['income_ >50K'].values\n",
    "print(f\"X.shape:{X.shape} y.shape:{y.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test score:0.81\n"
     ]
    }
   ],
   "source": [
    "# 调用LogisticRegression\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0)\n",
    "logreg=LogisticRegression(max_iter=3000)\n",
    "logreg.fit(X_train,y_train)\n",
    "print(f\"Test score:{round(logreg.score(X_test,y_test),2)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数字可以编码分类变量\n",
    "<P>有时候,分类变量会用数字来表示,比如高中三个年级用0 1 2表示。这时候我们需要在ge_dummies中显示调用各列，也可以将用数字表示的分类特征转化为str类型</P>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Integer Feature</th>\n",
       "      <th>Categorical Feature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>socks</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>fox</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>socks</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>box</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Integer Feature Categorical Feature\n",
       "0                0               socks\n",
       "1                1                 fox\n",
       "2                2               socks\n",
       "3                1                 box"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 创建一个数据框,包含一个整数特征和一个分类字符串特征\n",
    "demo_df = pd.DataFrame({'Integer Feature':[0,1,2,1],'Categorical Feature':['socks','fox','socks','box']})\n",
    "display(demo_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Integer Feature</th>\n",
       "      <th>Categorical Feature_box</th>\n",
       "      <th>Categorical Feature_fox</th>\n",
       "      <th>Categorical Feature_socks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Integer Feature  Categorical Feature_box  Categorical Feature_fox  \\\n",
       "0                0                        0                        0   \n",
       "1                1                        0                        1   \n",
       "2                2                        0                        0   \n",
       "3                1                        1                        0   \n",
       "\n",
       "   Categorical Feature_socks  \n",
       "0                          1  \n",
       "1                          0  \n",
       "2                          1  \n",
       "3                          0  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.get_dummies(demo_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Integer Feature_0</th>\n",
       "      <th>Integer Feature_1</th>\n",
       "      <th>Integer Feature_2</th>\n",
       "      <th>Categorical Feature_box</th>\n",
       "      <th>Categorical Feature_fox</th>\n",
       "      <th>Categorical Feature_socks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Integer Feature_0  Integer Feature_1  Integer Feature_2  \\\n",
       "0                  1                  0                  0   \n",
       "1                  0                  1                  0   \n",
       "2                  0                  0                  1   \n",
       "3                  0                  1                  0   \n",
       "\n",
       "   Categorical Feature_box  Categorical Feature_fox  Categorical Feature_socks  \n",
       "0                        0                        0                          1  \n",
       "1                        0                        1                          0  \n",
       "2                        0                        0                          1  \n",
       "3                        1                        0                          0  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# demo_df['Integer Feature'] = demo_df['Integer Feature'].astype(str)\n",
    "pd.get_dummies(demo_df,columns=['Integer Feature','Categorical Feature']) # 利用columns显示调用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Integer Feature_0</th>\n",
       "      <th>Integer Feature_1</th>\n",
       "      <th>Integer Feature_2</th>\n",
       "      <th>Categorical Feature_box</th>\n",
       "      <th>Categorical Feature_fox</th>\n",
       "      <th>Categorical Feature_socks</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Integer Feature_0  Integer Feature_1  Integer Feature_2  \\\n",
       "0                  1                  0                  0   \n",
       "1                  0                  1                  0   \n",
       "2                  0                  0                  1   \n",
       "3                  0                  1                  0   \n",
       "\n",
       "   Categorical Feature_box  Categorical Feature_fox  Categorical Feature_socks  \n",
       "0                        0                        0                          1  \n",
       "1                        0                        1                          0  \n",
       "2                        0                        0                          1  \n",
       "3                        1                        0                          0  "
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "demo_df['Integer Feature'] = demo_df['Integer Feature'].astype(str)\n",
    "pd.get_dummies(demo_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
